import numpy as np
import pandas as pd
import csv
import plotly.graph_objects as go
import numpy as np
import plotly.express as px
Trieda pre prehľadnosť výpisov v jupyter notebooku.
class display(object):
template = """<div style="float: left; padding: 10px;">
<p style = 'font-family:"Courier New", Courier, monospace'>{0}</p>{1}
</div>"""
def __init__(self, *a):
self.args = a
def _repr_html_(self):
return '\n'.join(self.template.format(a,eval(a)._repr_html_())
for a in self.args)
def __repr__(self):
return '\n\n'.join(a+'\n'+repr(eval(a))
for a in self.args)
Načítanie dát do DataFramu a aplikácia funkcie apply na prepis 0/1 --> zaznacene/nezaznacene
df_height = pd.read_csv('/home/zuzka/Desktop/statistics/import_clevermaps.csv',sep = ',')
df_height['zaznacene'] = df_height['zaznacene'].apply(lambda x: 'zaznacene' if x == 1 else 'nezaznacene')
df_height
x0 = df_height['vyska_odhad_pocty_bytu'].to_list()
x1 = df_height['vyska_odhad_pocty_pater'].to_list()
x2 = df_height['vyska_kompletni'].to_list()
fig = go.Figure()
# Use x instead of y argument for horizontal plot
fig.add_trace(go.Box(x=x0, name='vyska_odhad_pocty_bytu'))
fig.add_trace(go.Box(x=x1,name='vyska_odhad_pocty_pater'))
fig.add_trace(go.Box(x=x2, name ='vyska_kompletni'))
fig.show()
charakteristika_patra = df_height['vyska_odhad_pocty_pater'].describe().reset_index()
charakteristika_byty = df_height['vyska_odhad_pocty_bytu'].describe().reset_index()
charakteristika_komplet = df_height['vyska_kompletni'].describe().reset_index()
display('charakteristika_patra', 'charakteristika_byty', 'charakteristika_komplet')
df_height['vyska_odhad_pocty_bytu_bins'] = pd.cut(x=df_height['vyska_odhad_pocty_bytu'],
bins=list(range(2,32,2))+[85])
df_height_hist_byty = df_height.groupby('vyska_odhad_pocty_bytu_bins')['kod'].count().reset_index()
df_height['vyska_odhad_pocty_pater_bins'] = pd.cut(x=df_height['vyska_odhad_pocty_pater'],
bins=list(range(2,32,2))+[85])
df_height_hist_patra = df_height.groupby('vyska_odhad_pocty_pater_bins')['kod'].count().reset_index()
df_height['vyska_kompletni_bins'] = pd.cut(x=df_height['vyska_kompletni'], bins=list(range(2,32,2))+[85])
df_height_hist = df_height.groupby('vyska_kompletni_bins')['kod'].count().reset_index()
display('df_height_hist', 'df_height_hist_patra', 'df_height_hist_byty')
x_barplot= df_height_hist_patra['vyska_odhad_pocty_pater_bins'].astype('str')
import plotly.graph_objects as go
animals=['vyska_odhad_pocty_bytu', 'vyska_odhad_pocty_pater']
fig = go.Figure(data=[
go.Bar(name='vyska_odhad_pocty_bytu', x=x_barplot.to_list(), y=df_height_hist_byty['kod'].to_list(),),
go.Bar(name='vyska_odhad_pocty_pater', x=x_barplot.to_list(), y=df_height_hist_patra['kod'].to_list()),
go.Bar(name='kompletni_vyska', x=x_barplot.to_list(), y=df_height_hist['kod'].to_list()),
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
bouild_counts = pd.read_csv('/home/zuzka/Desktop/statistics/pocet_bytu_na_adresni_misto.csv',sep = ',')
bouild_counts
bouild_counts['vyska_odhad_pocty_bytu'] = bouild_counts.kod.map(df_height.set_index('kod')['vyska_odhad_pocty_bytu'])
bouild_counts['interval_pocet_bytov'] = pd.cut(x=bouild_counts['pocet_bytu_na_adresni_misto'], bins=list(range(4,38,4))+[50, 200])
bouild_counts
bouild_counts_bar = bouild_counts.groupby('interval_pocet_bytov').agg({'kod':'count', 'vyska_odhad_pocty_bytu': 'sum'}).reset_index().rename(columns={'kod':'count'})
bouild_counts_bar['average_height'] = bouild_counts_bar['vyska_odhad_pocty_bytu']/bouild_counts_bar['count']
bouild_counts_bar['interval_pocet_bytov'] = bouild_counts_bar['interval_pocet_bytov'].astype('str')
bouild_counts_bar
data = px.data.gapminder()
fig = px.bar(bouild_counts_bar, x='interval_pocet_bytov', y='count',
hover_data=['average_height'], color='count',
labels={'interval_pocet_bytov':'Pocet bytov', 'count':'Pocet Objektov' }, height=300)
fig.show()
data = px.data.gapminder()
fig = px.bar(bouild_counts_bar, x='interval_pocet_bytov', y='average_height',
hover_data=['average_height'], color='average_height',
labels={'interval_pocet_bytov':'Pocet bytov', 'average_height':'Priemerna vyska objektu' }, height=300)
fig.show()
floor_counts_polygon = pd.read_csv('/home/zuzka/Desktop/statistics/so_polygon_dwh.csv',sep = ',')
floor_counts_polygon.columns
floor_counts = floor_counts_polygon[['pocet_podlazi', 'kod']]
floor_counts
floor_counts['vyska_odhad_pocty_pater'] = floor_counts.kod.map(df_height.set_index('kod')['vyska_odhad_pocty_bytu'])
floor_counts['interval_pocet_pater'] = pd.cut(x=floor_counts['pocet_podlazi'], bins=list(range(0,10,1))+[12, 50])
floor_counts
floor_counts_bar = floor_counts.groupby('interval_pocet_pater').agg({'kod':'count', 'vyska_odhad_pocty_pater': 'sum'}).reset_index().rename(columns={'kod':'count'})
floor_counts_bar['average_height'] = floor_counts_bar['vyska_odhad_pocty_pater']/floor_counts_bar['count']
floor_counts_bar['interval_pocet_pater'] = floor_counts_bar['interval_pocet_pater'].astype('str')
floor_counts_bar
floor_counts_bar= floor_counts_bar.iloc[1:]
data = px.data.gapminder()
fig = px.bar(floor_counts_bar, x='interval_pocet_pater', y='count',
hover_data=['average_height'], color='count',
labels={'interval_pocet_pater':'Pocet pater', 'count':'Pocet Objektov' }, height=300)
fig.show()
#floor_counts_bar= floor_counts_bar.iloc[1:]
data = px.data.gapminder()
fig = px.bar(floor_counts_bar, x='interval_pocet_pater', y='average_height',
hover_data=['count'], color='average_height',
labels={'interval_pocet_pater':'Pocet pater', 'average_height':'Priemerna vyska objektov' }, height=300)
fig.show()
bar_zpusob_vyuziti = floor_counts_polygon.groupby('zpusob_vyuziti')['kod'].count().reset_index().sort_values(by='kod',ascending=False,)
bar_zpusob_vyuziti
buildings=bar_zpusob_vyuziti['zpusob_vyuziti'].to_list()
fig = go.Figure([go.Bar(x=buildings, y=bar_zpusob_vyuziti['kod'].to_list())])
fig.show()
zaznacene_chyba = df_height[df_height.values == "zaznacene"]
err_analysis = zaznacene_chyba[['kod', 'vyska_zmerena', 'vyska_odhad_pocty_bytu', 'vyska_odhad_pocty_pater', 'zpusob_vyuziti']]
err_analysis
err_analysis['diff_zmerana_byty_%'] = abs((err_analysis['vyska_zmerena'] -
err_analysis['vyska_odhad_pocty_bytu'])/(err_analysis['vyska_zmerena']/100))
err_analysis['diff_zmerana_patra_%'] = abs((err_analysis['vyska_zmerena'] -
err_analysis['vyska_odhad_pocty_pater'])/(err_analysis['vyska_zmerena']/100))
err_analysis.groupby('zpusob_vyuziti')['diff_zmerana_patra_%'].sum().reset_index().sort_values('diff_zmerana_patra_%', ascending=True).head()
grouped_err = err_analysis.groupby('zpusob_vyuziti').agg({'kod':'count', 'diff_zmerana_byty_%': 'sum',
'diff_zmerana_patra_%': 'sum' }).reset_index().rename(columns={'kod':'count'})
grouped_err
grouped_err['priemerna_chyba_byty'] = grouped_err['diff_zmerana_byty_%']/grouped_err['count']
grouped_err['priemerna_chyba_patra'] = grouped_err['diff_zmerana_patra_%']/grouped_err['count']
grouped_err
err_analysis['intervaly_vysky_byty'] = pd.cut(x=err_analysis['vyska_odhad_pocty_bytu'],
bins=list(range(2,32,2))+[85])
df_height_byty = err_analysis.groupby('intervaly_vysky_byty')['kod'].count().reset_index()
df_height_byty
err_analysis['intervaly_vysky_patra'] = pd.cut(x=err_analysis['vyska_odhad_pocty_pater'],
bins=list(range(2,32,2))+[85])
df_height_patra = err_analysis.groupby('intervaly_vysky_patra')['kod'].count().reset_index()
df_height_patra
err_analysis['intervaly_vysky_komplet'] = pd.cut(x=err_analysis['vyska_zmerena'],
bins=list(range(2,32,2))+[85])
df_height_komplet= err_analysis.groupby('intervaly_vysky_komplet')['kod'].count().reset_index()
df_height_komplet
x_barplot= df_height_komplet['intervaly_vysky_komplet'].astype('str')
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=x_barplot.to_list(),
y=df_height_komplet['kod'].to_list(), name = 'Vyska zmerana'))
fig.add_trace(
go.Bar(
x=x_barplot.to_list(),
y=df_height_patra['kod'].to_list(), name = 'Vysky podla pater'
))
fig.show()
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=x_barplot.to_list(),
y=df_height_komplet['kod'].to_list(), name = 'Vyska zmerana'))
fig.add_trace(
go.Bar(
x=x_barplot.to_list(),
y=df_height_byty['kod'].to_list(), name = 'Vysky podla bytov'
))
fig.show()